"""
/*
 * Copyright 2011 OpenWAF.com
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
 """
import HTMLTokenizer
import sys

class HTMLView:
    def __init__(self):
        self.rootNode = None
        self.childNodes = []
class HTMLViewNode:
    NODE_ELEMENT =1
    NODE_DATA    =2
    NODE_COMMENT =3
    NODE_DECL    =4
    def __init__(self):
        self.id=None
        self.name = ""
        self.attributes = []
        self.innerHTML = None
        self.childNodes = []
        self.lineno = -1
        self.offset = -1
        self.nodeType= HTMLViewNode.NODE_ELEMENT
        
    def __str__(self):
        s = ""
        if self.id!=None and self.name!=None and self.name.startswith("WAF"):
            s+="\t@ViewElement\n\tprivate "
            s+=self.name[4:]+" ";            
            s+=self.id+";"                        
        #if self.innerHTML != None:
        #    s += str(self.innerHTML) + " "
        for cnode in self.childNodes:
            s += str(cnode) 
        return s
class WAFHTMLParser():
    def __init__(self, filepath, skip_whitespace):
        self.filepath = filepath
        self.curNode = HTMLViewNode()
        self.rootNode = self.curNode
        self.nodeStack = []
        self.nodeStack.append(self.curNode)
        self.skip_whitespace = skip_whitespace
        self.ts=HTMLTokenizer.tokenize(filepath, not skip_whitespace)
        self.max_count=len(self.ts)
        self.parse()
    def parse(self):
        mx=len(self.ts)
        c=0        
        while c<mx:
            ct=self.ts[c]
            if ct.data.startswith("<%") or ct.data.startswith("<!"):
                nn=HTMLViewNode()
                nn.lineno = ct.lineno
                nn.offset = ct.pos
                nn.nodeType=HTMLViewNode.NODE_COMMENT
                nn.innerHTML=ct.data
                self.curNode.childNodes.append(nn)
                c=c+1
            elif ct.data=="</":
                c=c+1
                ct=self.ts[c]
                while len(ct.data.strip())==0:c=c+1;ct=self.ts[c];
                tag=ct.data
                if tag.lower() != self.curNode.name.lower():
                    message=""
                    message+="\n===================================="
                    message+="\n===================================="
                    message+="\nMatching tag not found for tag "+ self.curNode.name+ "("+ str(self.curNode.lineno)+ ","+ str(self.curNode.offset)+ ")\n"
                    message+="\n===================================="
                    message+="\nError Details "
                    message+="\n===================================="
                    message+="\nFilepath:"+ self.filepath
                    message+="\nLine no :"+str( ct.lineno)
                    message+="\nOffset  :"+str( ct.pos)
                    message+="\n===================================="
                    raise Exception(message)
           
                node = self.nodeStack.pop()
                self.curNode = node
                c=c+1
                ct=self.ts[c]
                while len(ct.data.strip())==0:c=c+1;ct=self.ts[c];
                if ct.data!=">":
                    raise Exception("Expecting > at "+str(ct.lineno)+" "+str(ct.pos))
                c=c+1
            elif ct.data=="<":
                node = HTMLViewNode()
                node.lineno = ct.lineno
                node.offset = ct.pos
                c=c+1
                ct=self.ts[c]            
                while len(ct.data.strip())==0:c=c+1;ct=self.ts[c];                
                node.name=ct.data
                c=c+1
                ct=self.ts[c]
                attrs=node.attributes
                while len(ct.data.strip())==0:c=c+1;ct=self.ts[c]; 
                while not (ct.data=="/>" or ct.data==">"):
                    while len(ct.data.strip())==0:c=c+1;ct=self.ts[c];
                    name=ct.data.strip()
                    c=c+1
                    ct=self.ts[c]
                    while len(ct.data.strip())==0:c=c+1;ct=self.ts[c];                    
                    if ct.data=="=":
                        c=c+1
                        ct=self.ts[c]
                        while len(ct.data.strip())==0:c=c+1;ct=self.ts[c];
                        value=ct.data.strip()
                        attrs.append([name,value])
                        c=c+1
                    else:
                        attrs.append([name,None])
                    ct=self.ts[c]
                    while len(ct.data.strip())==0:c=c+1;ct=self.ts[c];
                    
                self.curNode.childNodes.append(node)
                if ct.data==">":
                    if node.name.lower()=="meta":
                        pass
                    else:
                        self.nodeStack.append(self.curNode)
                        self.curNode = node
                c=c+1
            else:                
                if self.skip_whitespace:
                    if len(ct.data.strip()) == 0:
                        c=c+1
                        continue
                node = HTMLViewNode()
                node.lineno = ct.lineno
                node.offset = ct.pos
                node.nodeType= HTMLViewNode.NODE_DATA
                node.name = ""
                node.innerHTML = ct.data
                self.curNode.childNodes.append(node)
                c=c+1
             
        
if __name__=="__main__":
    h=WAFHTMLParser(sys.argv[1],True)
    print h.rootNode